/* 
    Purpose: Using the 1960 Census (5% sample), this file locates black and 
             white women aged 30-50 who are mothers of a child younger than 
             18 in the same household. Other variables necessary to create 
             average predicted mother income are also cleaned.

    Creates: Census1960_5pct_mothers30to50_adjustments.dta
*/
clear
set more off
cd "$Mydirectory1/1_DataSources/CensusData/"

use ./input/Census1960_5pct_raw.dta, clear //download from IPUMS USA

*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

***************************
** SET UP INCOME VARIABLES
***************************

* Fix income variables 
	replace inctot=. if inctot==9999999 
	replace inctot=0 if inctot<0
	
	replace incwage=. if incwage==999999 
	replace incwage=0 if incwage<0
	
* Total earnings in household 
	bysort serial famunit: egen fam_earnings = sum(incwage)
	bysort serial : egen hh_earnings = sum(incwage)
	
* Family income (v1--Census variable)
	replace ftotinc=. if ftotinc==9999999 
	replace ftotinc=0 if ftotinc<0
	
* Family income (v2--manually construct by summing individual income of family unit members)
	bysort serial famunit: egen fam_income = sum(inctot)

* Count discrepancies between v1 and v2 
    /*Note: Discrepancies appear to come 
            mostly from individuals living 
            in group quarters. */ 		
	count if ftotinc==. & fam_income>0 

* Harmonize v1 and v2         		
	replace ftotinc = fam_income if ftotinc==. & fam_income>0
	
* Household income

    //Grab one family member's income 
	sort serial famunit pernum
	by serial famunit: gen fam_head = _n==1
	
	gen temp = .
	replace temp = ftotinc if fam_head==1 
	replace temp = 0 if fam_head==1 & ftotinc==. 

    //Add up incomes of "separate" families within a serial to get household income (i.e. income by serial number)		
	bysort serial: egen hh_income = sum(temp)
	drop temp fam_head
	
/*
   Convert income variables to 1950 dollars using the CPI: 
   Source: https://www.minneapolisfed.org/about-us/monetary-policy/inflation-calculator/consumer-price-index-1913-)
*/
	gen CPI1960 = 29.6
	gen CPI1950= 24.1
	
	foreach var of varlist incwage inctot ftotinc hh_income {
		replace `var' = `var' * (CPI1950 / CPI1960)
	}
	
*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

***************************
** IDENTIFY MOTHERS
***************************
  
	sort serial pernum

* Count # mothers w/ children <18 are in each household 
	replace momloc=. if momloc==0 //don't count people w/o mother in house 
	replace momloc=. if age>=18 
	
	gen mother_inHH = momloc<.
	egen tag = tag(serial momloc)
	egen number_mothers = total(tag), by(serial)
	replace number_mothers=. if number_mothers==0
	label var number_mothers "Number of mothers in household"	
	tab number_mothers, m 

* Tag each mother in a household
	gen mom_test = momloc //person number of mother
	gen mother =.
	
	levelsof number_mothers, local(number)
	foreach x of local number{
	by serial: egen mother_`x' = min(mom_test) //Assign person number of mother with lowest person number
	replace mom_test =. if mother_`x' == mom_test //Move on to next mother in household
	replace mother=1 if mother_`x' == pernum
	}
	drop mom_test
	
* Keep mothers
	keep if mother==1
	label var mother "Mother with child under 18 in household"
	
* Keep mothers who are heads of household
	tab relate
	keep if relate==1

* Keep black and white mothers aged 30-50
	keep if age>=30 & age<=50
	keep if race==1 | race==2
	tab sex
	
*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

* Region of current residence 
	* Northeast: Connecticut, Maine, Massachusetts, New Hampshire, Rhode Island, Vermont, New Jersey, New York, Pennsylvania
	* Midwest: Illinois, Indiana, Michigan, Ohio, Wisconsin, Iowa, Kansas, Minnesota, Missouri, Nebraska, North Dakota, South Dakota
	/* South: Delaware, District of Columbia, Florida, Georgia, Maryland, North Carolina, South Carolina, Virginia, West Virginia, Alabama,
			  Kentucky, Mississippi, Tennessee, Arkansas, Louisiana, Oklahoma, Texas */
	* West: Arizona, Colorado, Idaho, Montana, Nevada, New Mexico, Utah, Wyoming, California, Oregon, Washington --note: Census puts AK and HI in with Pacific division

	gen region_merge =.
	replace region_merge =1 if (region==11 | region==12) //Northeast
	replace region_merge =2 if (region==21 | region==22) //Midwest
	replace region_merge =3 if inrange(region,31,33) //South
	replace region_merge =4 if (region==41 | region==42) //West
	tab region, m
	tab region_merge, m
	
	label define region_l 1 "NORTHEAST" 2 "MIDWEST" 3 "SOUTH" 4 "WEST"
	label values region_merge region_l
	tab region_merge, m
	
	gen south_merge = region_merge==3
	
* Education 
	gen edu=.
	replace edu=1 if educd<=25 //<grade school
	replace edu=2 if educd==26 //8th grade
	replace edu=3 if inlist(educd,30,40,50) //<hs
	replace edu=4 if educd==60 //hs
	replace edu=5 if educd>=65 & educd<999 //>hs. "999" is missing
	tab edu, m
	

*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*
	
****************
**** SAVE 
****************

	compress 
	save ./input/Census1960_5pct_mothers30to50_adjustments.dta, replace 
	
